Export and plot results from MCMC output

This notebook loads the MCMC output from the script estimate_models.r and calculates

Note that the plots below show the results for the entire election campaign, i.e. up until election day. The results written to csv are filtered to only include win probabilities and mean expected vote shares until the last day for which polls are available in a given scenario.

Libraries, functions

library(tictoc)
library(ggplot2)
library(dplyr)
library(data.table)
names_functions = list.files(here::here("functions"))
for (f in names_functions)
    source(here::here("functions", f))
rm(f, names_functions)

Load “global” variables

parties <- load_parties()
states <- load_states()
regions <- load_regions()
states_regions <- load_dataland_states_regions()
n_parties_by_state <- load_n_parties_by_geography("state")
dates_campaign <- load_dates_election_campaign(year = 2024)
electoral_votes <- load_electoral_votes()
election_day <- load_election_day()

Load priors -> visualize along with posterior distribution of \pi_T

priors <- readRDS(here::here("priors", "priors.Rds"))

Generate results for different scenarios

scenarios <- load_scenarios()

List to store plots

plts <- list()

Define a function to generate csv output and plots for each scenario

gen_results <- function(plts, scen) {
    print(scen)
    out <- readRDS(here::here("model", paste0("mcmc_out_", scen, ".Rds")))

    df_polls <- read.csv(here::here("data", paste0("df_polls_", scen, ".csv")))
    df_polls$date <- as.Date(df_polls$date)
    # make non-zero polls more visible in plots
    df_polls$value = ifelse(df_polls$value == 0, 
                            NA, 
                            df_polls$value) 

    df_prior_ppi <- invert_alr_on_prior(priors[[scen]][["m_mmu_T"]])

    # use data.table -> much, much quicker than tidyverse!
    df_draws_ppi <- convert_draws_to_dt(
        rstan::extract(out, pars = "ppi")[["ppi"]],                        
        geographies = states,
        parties = parties,
        dates_campaign = dates_campaign)
    
    df_draws_ppi_reg <- convert_draws_to_dt(
        rstan::extract(out, pars = "ppi_reg")[["ppi_reg"]],
        geographies = regions,
        parties = parties,
        dates_campaign = dates_campaign)

    df_draws_ppi_nat <- convert_draws_to_dt(
        rstan::extract(out, pars = "ppi_nat")[["ppi_nat"]],                        
        geographies = "national",
        parties = parties, 
        dates_campaign = dates_campaign)

    tic("calc prob win election")
    df_prob_win_election <- do.call(
        "rbind", 
        lapply(
            dates_campaign, 
            FUN = calc_prob_win_election,
            df_draws_ppi = df_draws_ppi,
            df_draws_ppi_nat = df_draws_ppi_nat,
            states = states,
            parties = parties,
            electoral_votes = electoral_votes
            )
        )
    toc()
    tic("calc prob win states")
    df_prob_win_states <- do.call(
        "rbind", 
        lapply(
            dates_campaign, 
            FUN = calc_prob_win_states,
            df_draws_ppi = df_draws_ppi,
            states = states,
            parties = parties
            )
        ) 
    toc()

    # Export mean vote share and win probabilities to read 
    tic("calc mean vote states")
    df_draws_ppi %>% 
    group_by(t, party, geography) %>% 
    summarise(mean_vote_share = mean(values)) %>%
    rename(date = t, province = geography) %>%
    mutate(party = tolower(party)) %>%
    tidyr::pivot_wider(
        names_from = "party", 
        values_from = "mean_vote_share",
        names_glue = "{party}_{.value}") -> df_out_mean
    toc()
    
    df_prob_win_states %>%
    mutate(party = tolower(party)) %>%
    rename(province = geography) %>%
    tidyr::pivot_wider(
        names_from = "party", 
        values_from = "prob_win",
        names_glue = "{party}_{.value}") -> df_out_prob_win_states
    
    write.csv(
        merge(
            # only export until day of latest available poll
            filter(df_out_mean, date <= max(df_polls$date)), 
            filter(df_out_prob_win_states, date <= max(df_polls$date)),
            by = c("date", "province")
        ), 
        file = here::here(paste0("provincial_forecast_", scen, ".csv")),
        row.names = FALSE
    )
    tic("calc mean vote share national")
    df_draws_ppi_nat %>% 
        select(-geography) %>%
        group_by(t, party) %>% 
        summarise(mean_vote_share = mean(values)) %>%
        rename(date = t) %>%
        mutate(party = tolower(party)) %>%
        tidyr::pivot_wider(
            names_from = "party", 
            values_from = "mean_vote_share",
            names_glue = "{party}_{.value}") -> df_out_mean_nat
    toc()
    tic("transform win prob election for export")
    df_prob_win_election %>%
        mutate(party = tolower(party)) %>%
        tidyr::pivot_wider(
            names_from = "party", 
            values_from = "prob_win",
            names_glue = "{party}_{.value}") -> df_out_prob_win_election
    toc()
    
    write.csv(
        merge(
            # only export until day of latest available poll
            filter(df_out_mean_nat, date <= max(df_polls$date)), 
            filter(df_out_prob_win_election, date <= max(df_polls$date)), 
            by = "date"
        ), 
        file = here::here(paste0("national_forecast_", scen, ".csv")),
        row.names = FALSE
    )

    # Plots    
    
    plot_prob_win_election(
        df_prob_win_election, 
        election_day, 
        scen) -> plts[[scen]][["plt_prob_win_election"]]

    tic("plot win prob election over time")
    plot_prob_win_election_over_time(
        df_prob_win_election, 
        scen) -> plts[[scen]][["plt_prob_win_election_over_time"]]
    toc()
    plot_prob_win_states(
        df_prob_win_states,
        election_day,
        scen) -> plts[[scen]][["plt_prob_win_states"]]

    plot_prob_win_states_over_time(
        df_prob_win_states, 
        scen) -> plts[[scen]][["plt_prob_win_states_over_time"]]

    plot_ppiT(
        df_draws_ppi, 
        df_prior_ppi, 
        election_day, 
        n_geographies = length(states), 
        scen) -> plts[[scen]][["plt_ppiT"]]
    
    plot_ppi(
        df_draws_ppi, 
        filter(df_polls, scenario == scen), 
        n_geographies = length(states), 
        type_of_poll = "state", 
        plt_title_prefix = scen) -> plts[[scen]][["plt_ppi"]]

    plot_ppi(
        df_draws_ppi_reg, 
        filter(df_polls, scenario == scen), 
        n_geographies = length(regions), 
        type_of_poll = "regional", 
        plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_reg"]]    
    

    plot_ppi(
        df_draws_ppi_nat, 
        filter(df_polls, scenario == scen), 
        n_geographies = 1, 
        type_of_poll = "national", 
        plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_nat"]]
    rm(df_draws_ppi, 
        df_draws_ppi_reg, 
        df_draws_ppi_nat, 
        df_polls,
        df_prior_ppi,
        df_prob_win_election,
        df_prob_win_states,
        df_out_prob_win_election,
        df_out_prob_win_states,
        df_out_mean, 
        df_out_mean_nat,
        out)
    plts 
}

Loop over scenarios

for (scen in scenarios) {
    plts <- gen_results(plts, scen)
}
[1] "A"
calc prob win election: 210.75 sec elapsed
calc prob win states: 194.59 sec elapsed
calc mean vote states: 0.39 sec elapsed
calc mean vote share national: 0.07 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "B"
calc prob win election: 315.86 sec elapsed
calc prob win states: 227.36 sec elapsed
calc mean vote states: 0.53 sec elapsed
calc mean vote share national: 0.08 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.04 sec elapsed
[1] "C"
calc prob win election: 278.38 sec elapsed
calc prob win states: 252.42 sec elapsed
calc mean vote states: 0.47 sec elapsed
calc mean vote share national: 0.11 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "D"
calc prob win election: 324.98 sec elapsed
calc prob win states: 195.61 sec elapsed
calc mean vote states: 0.56 sec elapsed
calc mean vote share national: 0.1 sec elapsed
transform win prob election for export: 0.03 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "E"
calc prob win election: 216.69 sec elapsed
calc prob win states: 282.51 sec elapsed
calc mean vote states: 0.72 sec elapsed
calc mean vote share national: 0.13 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.02 sec elapsed

Plot results